source("./all_artifacts.r")
## Rows: 396 Columns: 11
## ── Column specification ──────────────────────────────────────────────────────
## Delimiter: "\t"
## chr (1): pathway
## dbl (10): BhM1, BhM10, BhM2, BhM3, BhM4, BhM5, BhM6, BhM7, BhM8, BhM9
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 425 Columns: 11
## ── Column specification ──────────────────────────────────────────────────────
## Delimiter: "\t"
## chr (1): pathway
## dbl (10): BrH1, BrH10, BrH2, BrH3, BrH4, BrH5, BrH6, BrH7, BrH8, BrH9
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 427 Columns: 11
## ── Column specification ──────────────────────────────────────────────────────
## Delimiter: "\t"
## chr (1): pathway
## dbl (10): BrL1, BrL10, BrL2, BrL3, BrL4, BrL5, BrL6, BrL7, BrL8, BrL9
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 343 Columns: 11
## ── Column specification ──────────────────────────────────────────────────────
## Delimiter: "\t"
## chr (1): pathway
## dbl (10): CaS1, CaS10, CaS2, CaS3, CaS4, CaS5, CaS6, CaS7, CaS8, CaS9
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 352 Columns: 11
## ── Column specification ──────────────────────────────────────────────────────
## Delimiter: "\t"
## chr (1): pathway
## dbl (10): CeY1, CeY10, CeY2, CeY3, CeY4, CeY5, CeY6, CeY7, CeY8, CeY9
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 293 Columns: 11
## ── Column specification ──────────────────────────────────────────────────────
## Delimiter: "\t"
## chr (1): pathway
## dbl (10): GrI1, GrI10, GrI2, GrI3, GrI4, GrI5, GrI6, GrI7, GrI8, GrI9
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 378 Columns: 11
## ── Column specification ──────────────────────────────────────────────────────
## Delimiter: "\t"
## chr (1): pathway
## dbl (10): EaI1, EaI10, EaI2, EaI3, EaI4, EaI5, EaI6, EaI7, EaI8, EaI9
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 342 Columns: 11
## ── Column specification ──────────────────────────────────────────────────────
## Delimiter: "\t"
## chr (1): pathway
## dbl (10): CrC1, CrC10, CrC2, CrC3, CrC4, CrC5, CrC6, CrC7, CrC8, CrC9
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 401 Columns: 11
## ── Column specification ──────────────────────────────────────────────────────
## Delimiter: "\t"
## chr (1): pathway
## dbl (10): HaS1, HaS10, HaS2, HaS3, HaS4, HaS5, HaS6, HaS7, HaS8, HaS9
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 347 Columns: 11
## ── Column specification ──────────────────────────────────────────────────────
## Delimiter: "\t"
## chr (1): pathway
## dbl (10): NzS1, NzS10, NzS2, NzS3, NzS4, NzS5, NzS6, NzS7, NzS8, NzS9
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 398 Columns: 11
## ── Column specification ──────────────────────────────────────────────────────
## Delimiter: "\t"
## chr (1): pathway
## dbl (10): SvG1, SvG10, SvG2, SvG3, SvG4, SvG5, SvG6, SvG7, SvG8, SvG9
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 382 Columns: 11
## ── Column specification ──────────────────────────────────────────────────────
## Delimiter: "\t"
## chr (1): pathway
## dbl (10): StR1, StR10, StR2, StR3, StR4, StR5, StR6, StR7, StR8, StR9
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 374 Columns: 11
## ── Column specification ──────────────────────────────────────────────────────
## Delimiter: "\t"
## chr (1): pathway
## dbl (10): ViS1, ViS10, ViS2, ViS3, ViS4, ViS5, ViS6, ViS7, ViS8, ViS9
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
Qiime2 uses a compressed type of file format called an ‘Artifact’ for its analyses. Artifacts have different semantic types e.g. FeatureData[Sequence], Phylogeny[Unrooted] depending on the type of data they contain. To begin the analysis, I need to import my fastq files into FeatureData[SequencesWithQuality] or FeatureData[PairedEndSequencesWithQuality].
Although all of these reads were prepared with Illumina devices, sequencing quality can vary between sequencing centres, meaning that each sample will likely need specific parameters for cleaning. Furthermore, two of the samples were sequenced with paired-end format. This means that I’ll need to import each sample into different artifacts, merging them together once they have been cleaned. To reduce bias in sampling between sites, I have specifically looked for larger fastq files, then split them
Quality \((Q)\) is commonly measured in Phred scores, denoted as \(Q=-10 \log_{10}P\), where \(P\) is the probability of an incorrect base call. Therefore, higher values for Phred indicate a lower probability of an erroneous base. Every base position is given a Phred score, and it is common to see the score decrease the longer the read
# Relevant commands
qiime tools import \
--type 'SampleData[PairedEndSequencesWithQuality]' \
--input-path devon.tsv \
--output-path devonFQ.qza \
--input-format PairedEndFastqManifestPhred64V2
qiime tools import \
--type 'SampleData[SequencesWithQuality]' \
--input-path neem.tsv \
--output-path neem.qza \
--input-format SingleEndFastqManifestPhred33V2
The data I downloaded from the biological databases are in FastQ format, containing only the sequences of the reads with their quality scores from the sequencing machine. There were thirteen locations total, with ten samples from each
Two databases were obtained:
The MicFunPred and NCBI databases were obtained in the standard BLAST format, and need to be converted into compatible data types for import into the qiime2 workflow. Specficially, I needed to convert them into FASTA format with an associated taxonomy mapping file (in HeaderlessTSVTaxonomyFormat)
# 1
blastdbcmd -db NCBI_16S/16S_ribosomal_RNA -entry all > NCBI_16S.fasta
blastdbcmd -db micfun/micfun16S -entry all > micfun.fasta
# 2
grep '>' NCBI_16S.fasta | tr -d '>' | sed 's/ /\t/' | sed 's/ /_/g' > NCBI_16SID.txt
grep '>' micfun.fasta | tr -d '>' | sed 's/_/\t/' | sort | uniq > micfunID.txt # Unfortunately several of the headers repeat
# 3
cat micfun.fasta | sed 's/_.*//' > micfunID.fasta
cat NCBI_16S.fasta | sed 's/ .*//' > ncbi16sID.fasta
# 4
cat micfunID.txt NCBI_16SID.txt EzBioCloud/ezbiocloud_id_taxonomy.txt > all_mappings.txt
cat EzBioCloud/ezbiocloud_qiime_full.fasta ncbi16sID.fasta micfun.fasta > all.fasta
qiime tools import --type FeatureData[Taxonomy] --input-format HeaderlessTSVTaxonomyFormat --input-path all.fasta --output-path all_fasta.qza
qiime tools import --type FeatureData[Taxonomy] --input-format HeaderlessTSVTaxonomyFormat --input-path all_uniqIDs.txt --output-path Ids.qza
# Script for removing redundant ids
from Bio import SeqIO
import csv
exists: set = set()
mapped = open('all_uniq.fasta', 'w+')
for seq in SeqIO.parse('all.fasta', 'fasta'):
if seq.id in exists:
continue
exists.add(seq.id)
mapped.write(f'>{seq.id}\n')
mapped.write(f'{seq.seq}\n')
mapped.close
uniq = open('all_uniqIDs.txt', 'w+')
exists2: set = set()
with open('all_mappings.txt', 'r') as i:
for id in csv.reader(i, delimiter='\t'):
if id[0] in exists2:
continue
exists2.add(id[0])
uniq.write(f'{id[0]}\t{id[1]}\n')
uniq.close
The pipeline I made for processing the reads first takes has several quality control steps, including trimming and contaminant removal.
Afterwards, it combines reads using a 99% similarity threshold into operational taxonomic units or OTUs. The idea is that because every otu is unique, they should represent an indvidual species
workflow
The end results of the pipeline used in the upcoming analyses are the OTU frequency tables, taxonomic classifications of each OTU, frequency tables of biological functions predicted from the OTUs and finally phylogenetic trees, which describe the evolutionary relationship of each OTU.
For sample taxonomic classification, I will be trying out all three of the methods available in qiime2:
blast <- lapply(
get_artifact_data("./results/3-Classified", id_key, "BLAST_All"),
parse_taxonomy
)
sklearn <- lapply(
get_artifact_data("./results/3-Classified", id_key, "Sklearn"),
parse_taxonomy
)
sk_merged <- read_qza("./results/3-Classified/Merged-Sklearn.qza")$data %>%
parse_taxonomy()
count_identified(sklearn, "Sklearn")
## Sklearn: 0.584407164275873
count_identified(blast, "BLAST")
## BLAST: 0.579451912055851
sk_merged
The sklearn classifier has a slightly better number of identifications so will be used for all any downstream analyses
Definition: Alpha diversity describes the biodiversity within a site, how many different species are there.
The first thing I did was test the alpha diversity
alpha <- get_artifact_data("./results/7-Diversity",
id_key,
extension = "",
metric_list = alpha_metrics
)
pi_evenness <- data.frame(row.names = 1:10)
for (site in names(id_key)) {
pi_evenness[[id_key[[site]]]] <- alpha[[site]]$pi[, 1]
}
shannon <- data.frame(row.names = 1:10)
for (site in names(id_key)) {
shannon[[id_key[[site]]]] <- alpha[[site]]$sh[, 1]
}
even_plot <- pi_evenness %>%
gather() %>%
ggplot(aes(x = key, y = value, fill = key)) +
geom_boxplot() +
guides(fill = "none") +
labs(x = "Site", y = "Evenness", title = "Pielou evenness")
entropy_plot <- shannon %>%
gather() %>%
ggplot(aes(x = value, y = key, fill = key)) +
ggridges::geom_density_ridges2() +
guides(fill = "none") +
labs(y = "Site", x = "Shannon Entropy")
even_plot
entropy_plot
## Picking joint bandwidth of 0.163
It’s pretty clear that some sites have much lower evenness than others, indicating that they are consist mainly of a few species.
For the set to the left over here, from the left to Catriona snow, I used the Kruskal wallis test and found that there is no statistically significant difference in site evenness in this group at least.
From the box plot, we can see a few of the sites having similar evenness, particularly the Barrow mountain sites, Bihor mountains and Catriona snow. Whether or not the difference in Pielou evenness is statistically significant can be tested with Kruskal wallis
pi_evenness %>%
select(c(
"Barrow mountain high", "Barrow mountain low",
"Bihor mountains", "Catriona snow"
)) %>%
kruskal.test()
##
## Kruskal-Wallis rank sum test
##
## data: .
## Kruskal-Wallis chi-squared = 3.8824, df = 3, p-value = 0.2744
Beta diversity: Determining the distance/dissimilarity between sites, measured on a scale of 0 (identical) to 1 (completely different).
otu_freqs <- lapply(
get_artifact_data("./results/2-OTUs", id_key, "otuFreqs"),
as.data.frame
)
fasttree <- get_artifact_data(
"./results/6-RootedTrees", id_key,
"FastTree_RootedTree"
)
iqtree <- get_artifact_data(
"./results/6-RootedTrees", id_key,
"IQTREE_RootedTree"
)
matched <- match(iqtree$GrI$tip.label, rownames(otu_freqs$GrI))
freq_mapping <- otu_freqs$GrI[matched, ] %>%
rowMeans() %>%
as.data.frame() %>%
`rownames<-`(seq_along(rownames(.)) %>% paste("OTU", ., sep = ""))
iqtree$GrI$tip.label <- paste(rownames(freq_mapping), "freq =", freq_mapping[[1]])
sample_tree <- iqtree$GrI %>%
ggtree(layout = "roundrect", aes(color = "#A4E473")) +
geom_tiplab(size = 3, color = "#004651") +
geom_tippoint(color = "#66CC8A") +
theme(legend.position = "none")
sample_tree
beta <- get_artifact_data("./results/7-Diversity",
list(Merged = NULL),
extension = "",
metric_list = beta_metrics
)
pcoa2D <- get_artifact_data("./results/8-Analysis",
list(Merged = NULL),
extension = "PCOA-2D_",
metric_list = beta_metrics
)
pcoa2D_merged <- lapply(pcoa2D$Merged, metadata_merge_pcoa, metadata = metadata)
pcoaja <- plot_pcoa(pcoa2D_merged$ja, "Location",
title = "Jaccard distance"
)
pcoabc <- plot_pcoa(pcoa2D_merged$bc, "Location",
title = "Bray Curtis"
)
pcoauu <- plot_pcoa(pcoa2D_merged$uu, "Location",
title = "Unweighted Unifrac",
)
pcoawn <- plot_pcoa(pcoa2D_merged$wn, "Location",
title = "Weighted Normalized Unifrac ",
)
pcoa_arrange <- ggarrange(pcoabc, pcoauu, pcoawn,
ncol = 3,
common.legend = TRUE,
legend = "right"
)
pcoa_arrange
- The pcoa plots depict clearly how the choice of distance metric affects the clustering of samples. - Clustering in the Bray Curtis plot represents sites sharing many of the same species and in similar abundances. The big cluster falls apart once we factor the evolutionary distance between OTUs, shown by the Unifrac metrics. - This implies that although some taxa are shared, the unique taxa are a evolutionary distant (essentially have very different DNA) from the shared ones. - Once we add weightings by abundance though, new clusters form, indicating there are many more common taxa within the groups than there are unique taxa. - Even partitioning sites by habitat type - glacier and permafrost, doesn’t help
# Filter the sites
keep <- c("ViS", "BrL", "BrH", "SvG")
filter_wn <- filter_dm(beta$Merged$wn, keep)
filtered_meta <- filter_meta(metadata, keep)
Within the chosen cluster, its visually unclear whether or not the differences between sites are significant. There are specific hypothesis tests for this problem, such as Permutational Analysis of Variance (PERMANOVA) and Analysis of Similarities (ANOSIM)
They can be thought of as quantitative analyses of beta diversity: allowing you to say with confidence whether or not certain variables (in this case location) are generating a statistically significant difference in species composition
Both PERMANOVA and ANOSIM test the null hypothesis that within-group distances from each group are identical to between-group distance. With this dataset, rejecting the null hypothesis and concluding that between-group distance is different from within-group distance will let us conclude that different locations do have statistically significant species distribution. - The difference between them is… TODO:what is the difference between them?
adonis2(filter_wn ~ filtered_meta$Location)
anosim(filter_wn, grouping = filtered_meta$Location)
##
## Call:
## anosim(x = filter_wn, grouping = filtered_meta$Location)
## Dissimilarity:
##
## ANOSIM statistic R: 0.3687
## Significance: 0.001
##
## Permutation: free
## Number of permutations: 999
# Export for far pro tax database
otu_genus <- list()
for (id in names(id_key)) {
otu_genus[[id]] <- to_genus_csv(otu_freqs[[id]], blast[[id]])
}
genus_combined <- combine_freqs(otu_genus, taxon)
write.csv(combined, "genus_otu_tables.csv", row.names = FALSE)
The raw output tables here are the absolute abundances of inferred biological pathways in each site based on the MetaCyc database. PICRUSt2 works by first predicting important reactions for metabolism in the site (using KEGG Orthology (KO) and Enzyme Commission numbers (EC)) format, then using their abundances for pathway inference. A pathway is essentially a set of reactions working together for a specific purpose, such as energy storage or synthesis. - Originally I had planned to use the farprotax database, but the script the authors provided didn’t work. - I encountered another problem where PICRUSt2 failed to analyze the merged dataset so I had to combine the output tables from each sample.
ko_all <- ko %>%
# Merge the PICRUSt2 tables
reduce(merge, by = "pathway", all = TRUE) %>%
as_tibble() %>%
replace(is.na(.), 0) %>%
rel_abund(., pathway) %>% # Convert to relative abundances
as_tibble()
ko_all %>% dim() # There are 438 inferred pathways
## [1] 438 131
ko_xfunc <- ko_all %>% sites_x_func() # Transpose into sites x function format
ko_xfunc
# Compute bray curtis distance, then plot pcoa
bc_func <- vegdist(ko_xfunc, method = "bray")
pcoa_bc_func <- bc_func %>%
wcmdscale(k = 2) %>%
metadata_merge_pcoa(metadata, ., functions = TRUE)
## Joining with `by = join_by(sample.id)`
# Compute jaccard distance
ja_func <- vegdist(ko_xfunc, method = "jaccard")
pcoa_ja_func <- ja_func %>%
wcmdscale(k = 2) %>%
metadata_merge_pcoa(metadata, ., functions = TRUE)
## Joining with `by = join_by(sample.id)`
# Plot and compare with ordination on taxonomy
plot_ja_func <- plot_pcoa(pcoa_ja_func, "Location",
title = "Jaccard distance",
subtitle = "From biological pathways", functions = TRUE
)
plot_bc_func <- plot_pcoa(pcoa_bc_func, "Location",
title = "Bray Curtis",
subtitle = "From biological pathways", functions = TRUE
)
func_compare <- ggarrange(plot_ja_func, pcoaja, plot_bc_func, pcoabc,
ncol = 2, nrow = 2, common.legend = TRUE, legend = "bottom"
)
func_compare
Finally, I will perform tests on the abundance of identified OTUs and the predicted pathways in the previously selected sites. Differential abundance analysis this refers to identifying which otus are more or less present between different samples. Traditional statistical tests are inappropriate for this application generally because the data is so sparse.
I will be using the Analysis of Microbiomes with Bias correction test (ANCOM-BC). To get an idea of what to expect from the test, I’ll produce a bar plot depicting the relative phylum-level abundances between the sites.
all <- read_qza("./results/2-OTUs/Merged-otuFreqs.qza")$data
sk_merged <- read_qza("./results/3-Classified/Merged-Sklearn.qza")$data %>%
parse_taxonomy()
ranks <- merge_with_id(all, sk_merged, level = 2) %>%
filter(!(is.na(taxon))) %>%
group_by(taxon) %>%
summarise(across(everything(), sum))
not_bacteria <- c(
"Arthropoda", "Nanoarchaeota", "Diatomea", "Altiarchaeota",
"Ascomycota", "Basidiomycota", "Cercozoa", "Ciliophora", "Asgardarchaeota",
"Phragmoplastophyta", "Euryarchaeota", "Crenarchaeota"
)
shown_paths <- c(
"CHLOROPHYLL-SYN", "GLYCOLYSIS", "TCA", "CALVIN-PWY",
"PENTOSE-P-PWY", "METHANOGENESIS-PWY", "DENITRIFICATION-PWY", "FERMENTATION-PWY",
"LACTOSECAT-PWY", "METH-ACETATE-PWY"
)
nice_paths <- ko_all %>%
filter((grepl(paste(shown_paths, collapse = "|"), pathway)))
tax_sum <- sum_by_site(ranks, id_key, "taxon", not_bacteria)
path_sum <- sum_by_site(nice_paths, id_key, "pathway", NaN)
stacked <- tax_sum %>% ggplot(., aes(x = name, y = value, fill = identifier)) +
geom_bar(stat = "identity") +
scale_fill_discrete(name = "Phylum") +
scale_color_paletteer_d("pals::glasbey") +
labs(
x = "Site", y = "Relative abundance",
subtitle = "*Putative phyla and false positives
(non-prokaryotes) removed"
)
stacked
heat_path <- path_sum %>% ggplot(., aes(x = name, y = identifier, fill = value)) +
geom_tile() +
scale_fill_gradient2(
name = "Relative abundance",
mid = "seagreen1", low = "springgreen", high = "seagreen"
) +
labs(
x = "Site", y = "Pathway",
)
heat_path
Going to the second-most inclusive taxonomic rank was essential here, given that there are 10668 OTUs total. Still, I noticed there were a few identifications that weren’t prokaryotic. These are most likely false positives, given the specificity of the 16s rRNA primers used to sequence the samples.
From the plot, the TODO:Which look abundant?
There wouldn’t be much point to testing differential abundance for sites that we already know to be similar based on the weighted unifrac plots, so I stuck with the cluster of sites that ANOSIM and PERMANOVA were conducted on
# First we prepare the TreeSummarizedExperiment object
tax_info <- c("Kingdom", "Phylum", "Class", "Order", "Family", "Genus", "Species")
wrong_tax <- c("Unassigned", "Arthropoda", "Bacteria", "Insecta")
filtered_otus <- read_qza("./results/2-OTUs/Merged-otuFreqs.qza")$data %>%
as.data.frame() # %>%
# select(matches(keep))
chosen_paths <- ko_all %>%
select(c(1, matches(keep))) %>%
column_to_rownames(., var = "pathway")
# Format the metadata
formatted_meta_paths <- filtered_meta[match(colnames(chosen_paths), filtered_meta$sample.id), ] %>%
`rownames<-`(NULL) %>%
column_to_rownames(., var = "sample.id") %>%
sample_data()
phylo_path <- phyloseq(
otu_table(chosen_paths, taxa_are_rows = TRUE),
formatted_meta_paths
)
matched_tax <- sk_merged[match(
rownames(filtered_otus),
rownames(sk_merged)
), ] %>%
tax_table() %>%
`colnames<-`(tax_info)
## Warning in .local(object): Coercing from data.frame class to character matrix
## prior to building taxonomyTable.
## This could introduce artifacts.
## Check your taxonomyTable, or coerce to matrix manually.
# All of this is necessary because the TSE object won't be
# created properly unless the indices of the rownames match precisely
formatted_meta <- metadata[match(colnames(filtered_otus), metadata$sample.id), ] %>%
`rownames<-`(NULL) %>%
column_to_rownames(., var = "sample.id") %>%
sample_data()
rownames(filtered_otus) <- rownames(matched_tax)
my_phylo <- phyloseq(
otu_table(filtered_otus, taxa_are_rows = TRUE), formatted_meta,
matched_tax
)
tse <- mia::makeTreeSummarizedExperimentFromPhyloseq(my_phylo)
# Now run the test, testing differential abundance between locations
tse_paths <- mia::makeTreeSummarizedExperimentFromPhyloseq(phylo_path)
var <- "Type"
chosen_rank <- "Class"
abc <- ancombc2(
data = tse, assay_name = "counts", tax_level = chosen_rank,
fix_formula = var, global = TRUE, group = var,
pairwise = TRUE
)
# Plot the pairwise comparisons
abc_paths <- ancombc2(
data = tse_paths, assay_name = "counts", tax_level = "Species",
fix_formula = "Location"
)
abc_res <- abc$res_pair %>% select(-(grep("Intercept", colnames(abc$res_pair))))
diff_abund <- ancombc_select(abc_res, glue("diff_{var}"), chosen_rank, wrong_tax)
## taxon diff_TypeOther diff_TypePermafrost
## 1 Class:Gammaproteobacteria TRUE TRUE
## 2 Class:Alphaproteobacteria TRUE TRUE
## 3 Class:Ignavibacteria TRUE TRUE
## 4 Class:Bacteroidia TRUE FALSE
## 5 Class:Acidobacteriae FALSE FALSE
## 6 Class:Desulfuromonadia FALSE TRUE
## 7 Class:Thermoleophilia FALSE FALSE
## 8 Class:Anaerolineae TRUE FALSE
## 9 Class:Elusimicrobia FALSE FALSE
## 10 Class:Clostridia TRUE FALSE
## 11 Class:Desulfitobacteriia TRUE TRUE
## 12 Class:Holophagae TRUE FALSE
## 13 Class:Actinobacteria TRUE FALSE
## 14 Class:Verrucomicrobiae FALSE FALSE
## 15 Class:Polyangia FALSE FALSE
## 16 Class:Oligoflexia TRUE TRUE
## 17 Class:Thermodesulfovibrionia TRUE TRUE
## 18 Class:Acidimicrobiia FALSE FALSE
## 19 Class:Planctomycetes FALSE FALSE
## 20 Class:Cyanobacteriia FALSE TRUE
## 21 Class:TK10 FALSE TRUE
## 22 Class:Abditibacteria FALSE FALSE
## 23 Class:Dehalococcoidia TRUE FALSE
## 24 Class:Chloroflexia TRUE FALSE
## 25 Class:Phycisphaerae TRUE FALSE
## 26 Class:Desulfobulbia TRUE TRUE
## 27 Class:Chlamydiae FALSE FALSE
## 28 Class:Bacilli FALSE TRUE
## 29 Class:Gemmatimonadetes TRUE FALSE
## 30 Class:Syntrophia FALSE TRUE
## 31 Class:Armatimonadia FALSE TRUE
## 32 Class:Desulfobacteria TRUE TRUE
## 33 Class:4-29-1 TRUE TRUE
## 34 Class:Fimbriimonadia TRUE TRUE
## 35 Class:Parcubacteria TRUE FALSE
## 36 Class:Chthonomonadetes FALSE TRUE
## 37 Class:Microgenomatia FALSE FALSE
## 38 Class:Vicinamibacteria FALSE FALSE
## 39 Class:Syntrophorhabdia FALSE FALSE
## 40 Class:WS4 FALSE FALSE
## 41 Class:Negativicutes FALSE FALSE
## 42 Class:vadinHA49 FALSE FALSE
## 43 Class:ABY1 FALSE TRUE
## 44 Class:Vampirivibrionia FALSE FALSE
## 45 Class:Caldisericia FALSE TRUE
## 46 Class:Coriobacteriia FALSE TRUE
## 47 Class:Lineage_IIa FALSE FALSE
## 48 Class:Lineage_IIb FALSE FALSE
## 49 Class:KD4-96 TRUE FALSE
## 50 Class:OM190 FALSE TRUE
## 51 Class:Latescibacterota FALSE TRUE
## 52 Class:uncultured TRUE TRUE
## 53 Class:WCHB1-81 FALSE FALSE
## 54 Class:NB1-j TRUE TRUE
## 55 Class:Fibrobacteria FALSE FALSE
## 56 Class:Saccharimonadia TRUE TRUE
## 57 Class:Nanoarchaeia TRUE TRUE
## 58 Class:Berkelbacteria TRUE TRUE
## 59 Class:OLB14 FALSE TRUE
## 60 Class:Blastocatellia FALSE FALSE
## 61 Class:Kryptonia FALSE FALSE
## 62 Class:BD7-11 FALSE TRUE
## 63 Class:Spirochaetia FALSE FALSE
## 64 Class:Desulfobaccia TRUE TRUE
## 65 Class:Omnitrophia FALSE TRUE
## 66 Class:Lentisphaeria FALSE FALSE
## 67 Class:Myxococcia FALSE FALSE
## 68 Class:RCP2-54 TRUE TRUE
## 69 Class:Bathyarchaeia FALSE FALSE
## 70 Class:Syntrophobacteria FALSE FALSE
## 71 Class:Babeliae FALSE FALSE
## 72 Class:Kiritimatiellae FALSE FALSE
## 73 Class:Aminicenantia TRUE FALSE
## 74 Class:uncultured_1 TRUE FALSE
## 75 Class:Thermoanaerobaculia FALSE FALSE
## 76 Class:JG30-KF-CM66 TRUE TRUE
## 77 Class:WPS-2 TRUE TRUE
## 78 Class:SJA-28 FALSE FALSE
## 79 Class:MBNT15 FALSE FALSE
## 80 Class:Bdellovibrionia TRUE FALSE
## 81 Class:Gracilibacteria TRUE TRUE
## 82 Class:Kapabacteria FALSE FALSE
## 83 Class:Ktedonobacteria FALSE FALSE
## 84 Class:Thermoplasmata TRUE TRUE
## 85 Class:Methanomicrobia TRUE FALSE
## 86 Class:Syntrophomonadia FALSE FALSE
## 87 Class:FCPU426 TRUE TRUE
## 88 Class:Subgroup_18 FALSE FALSE
## 89 Class:SHA-26 TRUE TRUE
## 90 Class:Sericytochromatia TRUE TRUE
## 91 Class:Hydrogenedentia TRUE TRUE
## 92 Class:Campylobacteria TRUE TRUE
## 93 Class:TA06 TRUE TRUE
## 94 Class:Sumerlaeia FALSE FALSE
## 95 Class:Deinococci TRUE TRUE
## 96 Class:Methylomirabilia FALSE FALSE
## 97 Class:Synergistia FALSE FALSE
## 98 Class:Methanosarcinia FALSE FALSE
## 99 Class:Nitrospiria FALSE TRUE
## 100 Class:Methanobacteria FALSE TRUE
## diff_TypePermafrost_TypeOther
## 1 FALSE
## 2 FALSE
## 3 TRUE
## 4 FALSE
## 5 FALSE
## 6 TRUE
## 7 FALSE
## 8 TRUE
## 9 FALSE
## 10 TRUE
## 11 FALSE
## 12 TRUE
## 13 FALSE
## 14 TRUE
## 15 FALSE
## 16 FALSE
## 17 FALSE
## 18 FALSE
## 19 FALSE
## 20 TRUE
## 21 FALSE
## 22 FALSE
## 23 FALSE
## 24 TRUE
## 25 TRUE
## 26 TRUE
## 27 TRUE
## 28 FALSE
## 29 FALSE
## 30 TRUE
## 31 TRUE
## 32 TRUE
## 33 FALSE
## 34 FALSE
## 35 TRUE
## 36 FALSE
## 37 FALSE
## 38 FALSE
## 39 FALSE
## 40 FALSE
## 41 TRUE
## 42 FALSE
## 43 FALSE
## 44 FALSE
## 45 TRUE
## 46 TRUE
## 47 FALSE
## 48 FALSE
## 49 TRUE
## 50 TRUE
## 51 FALSE
## 52 TRUE
## 53 FALSE
## 54 TRUE
## 55 FALSE
## 56 FALSE
## 57 FALSE
## 58 TRUE
## 59 TRUE
## 60 FALSE
## 61 FALSE
## 62 TRUE
## 63 FALSE
## 64 FALSE
## 65 FALSE
## 66 FALSE
## 67 FALSE
## 68 TRUE
## 69 TRUE
## 70 FALSE
## 71 FALSE
## 72 FALSE
## 73 FALSE
## 74 FALSE
## 75 FALSE
## 76 TRUE
## 77 FALSE
## 78 FALSE
## 79 FALSE
## 80 FALSE
## 81 FALSE
## 82 FALSE
## 83 FALSE
## 84 FALSE
## 85 TRUE
## 86 FALSE
## 87 TRUE
## 88 FALSE
## 89 TRUE
## 90 TRUE
## 91 FALSE
## 92 TRUE
## 93 FALSE
## 94 FALSE
## 95 TRUE
## 96 FALSE
## 97 FALSE
## 98 FALSE
## 99 TRUE
## 100 TRUE
se <- ancombc_select(abc_res, glue("se_{var}"), chosen_rank, wrong_tax)
## taxon se_TypeOther se_TypePermafrost
## 1 Class:Gammaproteobacteria 0.3847080 0.4665029
## 2 Class:Alphaproteobacteria 0.4087152 0.4947957
## 3 Class:Ignavibacteria 0.3453434 0.4201332
## 4 Class:Bacteroidia 0.4350826 0.5258802
## 5 Class:Acidobacteriae 0.5857075 0.7036097
## 6 Class:Desulfuromonadia 0.3555546 0.4321588
## 7 Class:Thermoleophilia 0.4729832 0.5705779
## 8 Class:Anaerolineae 0.4603346 0.5556589
## 9 Class:Elusimicrobia 0.2217932 0.2747583
## 10 Class:Clostridia 0.4964742 0.5982906
## 11 Class:Desulfitobacteriia 0.3971182 0.4811274
## 12 Class:Holophagae 0.4963517 0.5981462
## 13 Class:Actinobacteria 0.4033963 0.4885265
## 14 Class:Verrucomicrobiae 0.5669259 0.6814368
## 15 Class:Polyangia 0.4705891 0.5677540
## 16 Class:Oligoflexia 0.3041350 0.3716230
## 17 Class:Thermodesulfovibrionia 0.3121296 0.3810316
## 18 Class:Acidimicrobiia 0.4357540 0.5266718
## 19 Class:Planctomycetes 0.5218037 0.6281788
## 20 Class:Cyanobacteriia 0.5467127 0.6575769
## 21 Class:TK10 0.2204034 0.2731227
## 22 Class:Abditibacteria 0.2216204 0.2745549
## 23 Class:Dehalococcoidia 0.3238298 0.3948034
## 24 Class:Chloroflexia 0.3352594 0.4082593
## 25 Class:Phycisphaerae 0.4082909 0.4942956
## 26 Class:Desulfobulbia 0.2952253 0.3611389
## 27 Class:Chlamydiae 0.3355988 0.4086589
## 28 Class:Bacilli 0.4678507 0.5645239
## 29 Class:Gemmatimonadetes 0.4695381 0.5665142
## 30 Class:Syntrophia 0.3918298 0.4748951
## 31 Class:Armatimonadia 0.3468662 0.4219265
## 32 Class:Desulfobacteria 0.1747615 0.2192702
## 33 Class:4-29-1 0.3118216 0.3806691
## 34 Class:Fimbriimonadia 0.2580615 0.3174210
## 35 Class:Parcubacteria 0.2612333 0.3211516
## 36 Class:Chthonomonadetes 0.2117844 0.2629771
## 37 Class:Microgenomatia 0.2383958 0.2942908
## 38 Class:Vicinamibacteria 0.5177265 0.6233674
## 39 Class:Syntrophorhabdia 0.2627666 0.3229551
## 40 Class:WS4 0.2141088 0.2657137
## 41 Class:Negativicutes 0.3496483 0.4252027
## 42 Class:vadinHA49 0.1783101 0.2234759
## 43 Class:ABY1 0.1848031 0.2311584
## 44 Class:Vampirivibrionia 0.2064139 0.2566523
## 45 Class:Caldisericia 0.3451860 0.4199478
## 46 Class:Coriobacteriia 0.3389906 0.4126526
## 47 Class:Lineage_IIa 0.3360990 0.4092479
## 48 Class:Lineage_IIb 0.1925943 0.2403602
## 49 Class:KD4-96 0.4526056 0.5465434
## 50 Class:OM190 0.2457155 0.3029002
## 51 Class:Latescibacterota 0.3593841 0.4366693
## 52 Class:uncultured 0.2483037 0.3059444
## 53 Class:WCHB1-81 0.2286988 0.2828835
## 54 Class:NB1-j 0.2203192 0.2730237
## 55 Class:Fibrobacteria 0.2464245 0.3037341
## 56 Class:Saccharimonadia 0.3128389 0.3818664
## 57 Class:Nanoarchaeia 0.2480777 0.3056786
## 58 Class:Berkelbacteria 0.1606679 0.2024900
## 59 Class:OLB14 0.1833705 0.2294646
## 60 Class:Blastocatellia 0.3594943 0.4367990
## 61 Class:Kryptonia 0.3055033 0.3732333
## 62 Class:BD7-11 0.1513419 0.1912718
## 63 Class:Spirochaetia 0.2725547 0.3344681
## 64 Class:Desulfobaccia 0.2766144 0.3392436
## 65 Class:Omnitrophia 0.2785289 0.3414957
## 66 Class:Lentisphaeria 0.2200849 0.2727479
## 67 Class:Myxococcia 0.3997010 0.4841713
## 68 Class:RCP2-54 0.3109724 0.3796696
## 69 Class:Bathyarchaeia 0.2948211 0.3606632
## 70 Class:Syntrophobacteria 0.2321906 0.2869915
## 71 Class:Babeliae 0.3429590 0.4173254
## 72 Class:Kiritimatiellae 0.1908311 0.2382790
## 73 Class:Aminicenantia 0.3143810 0.3836814
## 74 Class:uncultured_1 0.2942370 0.3599760
## 75 Class:Thermoanaerobaculia 0.3446046 0.4192632
## 76 Class:JG30-KF-CM66 0.2598224 0.3194922
## 77 Class:WPS-2 0.3959218 0.4797174
## 78 Class:SJA-28 0.3481344 0.4234199
## 79 Class:MBNT15 0.3955909 0.4793273
## 80 Class:Bdellovibrionia 0.3198407 0.3901078
## 81 Class:Gracilibacteria 0.2653928 0.3260440
## 82 Class:Kapabacteria 0.2345840 0.2898070
## 83 Class:Ktedonobacteria 0.2302796 0.2847433
## 84 Class:Thermoplasmata 0.2611956 0.3211073
## 85 Class:Methanomicrobia 0.3091441 0.3775180
## 86 Class:Syntrophomonadia 0.1950374 0.2432427
## 87 Class:FCPU426 0.1418229 0.1796212
## 88 Class:Subgroup_18 0.1909112 0.2383736
## 89 Class:SHA-26 0.1919086 0.2395508
## 90 Class:Sericytochromatia 0.2231683 0.2763764
## 91 Class:Hydrogenedentia 0.2480355 0.3056289
## 92 Class:Campylobacteria 0.1869037 0.2336409
## 93 Class:TA06 0.2162483 0.2682324
## 94 Class:Sumerlaeia 0.1754838 0.2201268
## 95 Class:Deinococci 0.2973737 0.3636668
## 96 Class:Methylomirabilia 0.3326393 0.4051744
## 97 Class:Synergistia 0.1965854 0.2450685
## 98 Class:Methanosarcinia 0.2539627 0.3126002
## 99 Class:Nitrospiria 0.3088203 0.3771368
## 100 Class:Methanobacteria 0.2908400 0.3559791
## se_TypePermafrost_TypeOther
## 1 0.4688969
## 2 0.4940810
## 3 0.4278639
## 4 0.5218578
## 5 0.6822037
## 6 0.4384733
## 7 0.5619666
## 8 0.5485593
## 9 0.3021706
## 10 0.5869185
## 11 0.4819019
## 12 0.5867883
## 13 0.4884921
## 14 0.6620916
## 15 0.5594273
## 16 0.3853371
## 17 0.3935479
## 18 0.5225665
## 19 0.6138914
## 20 0.6404777
## 21 0.3007920
## 22 0.3019992
## 23 0.4056004
## 24 0.4174127
## 25 0.4936350
## 26 0.3762111
## 27 0.4177640
## 28 0.5565237
## 29 0.5583128
## 30 0.4763564
## 31 0.4294444
## 32 0.2559419
## 33 0.3932313
## 34 0.3384573
## 35 0.3416582
## 36 0.2922622
## 37 0.3187081
## 38 0.6095453
## 39 0.3432071
## 40 0.2945593
## 41 0.4323335
## 42 0.2594100
## 43 0.2657574
## 44 0.2869641
## 45 0.4277005
## 46 0.4212766
## 47 0.4182818
## 48 0.2733850
## 49 0.5403771
## 50 0.3260389
## 51 0.4424588
## 52 0.3286368
## 53 0.3090337
## 54 0.3007086
## 55 0.3267503
## 56 0.3942774
## 57 0.3284099
## 58 0.2421320
## 59 0.2643564
## 60 0.4425735
## 61 0.3867410
## 62 0.2328808
## 63 0.3531171
## 64 0.3572385
## 65 0.3591843
## 66 0.3004762
## 67 0.4846123
## 68 0.3923582
## 69 0.3757977
## 70 0.3125124
## 71 0.4253902
## 72 0.2716573
## 73 0.3958639
## 74 0.3752005
## 75 0.4270973
## 76 0.3402339
## 77 0.4806469
## 78 0.4307612
## 79 0.4802998
## 80 0.4014867
## 81 0.3458622
## 82 0.3149000
## 83 0.3106079
## 84 0.3416201
## 85 0.3904794
## 86 0.2757804
## 87 0.2231513
## 88 0.2717358
## 89 0.2727130
## 90 0.3035355
## 91 0.3283675
## 92 0.2678124
## 93 0.2966758
## 94 0.2566479
## 95 0.3784092
## 96 0.4147016
## 97 0.2772993
## 98 0.3343272
## 99 0.3901466
## 100 0.3717293
# abc_res %>% filter(taxon == "Class:Alphaproteobacteria")
lfc <- ancombc_select(abc_res, glue("lfc_{var}"), chosen_rank, wrong_tax) %>%
merge(se, by = c("name", "taxon")) %>%
merge(diff_abund, by = c("name", "taxon")) %>%
filter((!!as.symbol(glue("diff_{var}"))) == TRUE) # Don't want to show taxa that don't have statistically
## taxon lfc_TypeOther lfc_TypePermafrost
## 1 Class:Gammaproteobacteria -1.58639694 -1.88516422
## 2 Class:Alphaproteobacteria -1.75483556 -1.47950071
## 3 Class:Ignavibacteria -0.86377144 1.10485259
## 4 Class:Bacteroidia -1.58601913 -0.91392558
## 5 Class:Acidobacteriae -0.40020774 0.93779489
## 6 Class:Desulfuromonadia 0.14654881 2.36102842
## 7 Class:Thermoleophilia 0.19481034 0.37665739
## 8 Class:Anaerolineae -1.24667292 1.14375909
## 9 Class:Elusimicrobia -0.11622983 0.44780832
## 10 Class:Clostridia -2.53616934 0.19916709
## 11 Class:Desulfitobacteriia -2.04365751 -1.92574500
## 12 Class:Holophagae -1.23945204 0.72629274
## 13 Class:Actinobacteria -1.61847830 -0.85702972
## 14 Class:Verrucomicrobiae -0.54961239 1.38209753
## 15 Class:Polyangia -0.14733200 -0.02151696
## 16 Class:Oligoflexia -1.83686387 -1.74608950
## 17 Class:Thermodesulfovibrionia -1.66404405 -1.56142926
## 18 Class:Acidimicrobiia -0.11124616 0.32581372
## 19 Class:Planctomycetes -0.29405939 -0.01391663
## 20 Class:Cyanobacteriia 1.16356868 -3.78398088
## 21 Class:TK10 -0.53889679 -0.94562776
## 22 Class:Abditibacteria -0.33395119 -0.53834739
## 23 Class:Dehalococcoidia -1.55880419 -0.62911687
## 24 Class:Chloroflexia 0.96298151 -0.73211096
## 25 Class:Phycisphaerae -1.07328122 0.61715446
## 26 Class:Desulfobulbia -1.34180078 -2.54058107
## 27 Class:Chlamydiae 0.82653256 -0.45788490
## 28 Class:Bacilli -1.07899584 -1.44320629
## 29 Class:Gemmatimonadetes -1.69801319 -0.52628060
## 30 Class:Syntrophia -0.62480828 2.18965634
## 31 Class:Armatimonadia -0.52395165 -1.63107304
## 32 Class:Desulfobacteria -0.52247730 -1.69335817
## 33 Class:4-29-1 -1.57102153 -1.60249519
## 34 Class:Fimbriimonadia -0.92814278 -1.44805301
## 35 Class:Parcubacteria -1.43888110 0.69664095
## 36 Class:Chthonomonadetes -0.45077983 -0.69388775
## 37 Class:Microgenomatia -0.64355234 -0.31184965
## 38 Class:Vicinamibacteria 0.04044207 0.64420874
## 39 Class:Syntrophorhabdia -0.19349606 0.50285514
## 40 Class:WS4 -0.04614480 -0.36956875
## 41 Class:Negativicutes -0.74906633 0.41158395
## 42 Class:vadinHA49 0.04867115 -0.47316581
## 43 Class:ABY1 -0.23399913 -0.66085270
## 44 Class:Vampirivibrionia 0.00725604 0.07608929
## 45 Class:Caldisericia 0.39574640 1.53195403
## 46 Class:Coriobacteriia -0.61133570 1.10982731
## 47 Class:Lineage_IIa 0.75960399 0.73482277
## 48 Class:Lineage_IIb 0.19033521 -0.54761668
## 49 Class:KD4-96 -2.18077518 0.67828577
## 50 Class:OM190 -0.03221447 -1.47305136
## 51 Class:Latescibacterota -0.49205384 -1.48455314
## 52 Class:uncultured 0.64461133 -1.13717511
## 53 Class:WCHB1-81 0.03036095 0.52792579
## 54 Class:NB1-j -0.96579004 -2.45848583
## 55 Class:Fibrobacteria 0.19028766 0.73009702
## 56 Class:Saccharimonadia -1.27799031 -1.71882056
## 57 Class:Nanoarchaeia -0.78417453 -0.75792448
## 58 Class:Berkelbacteria -0.69332552 -2.17242237
## 59 Class:OLB14 -0.42520407 -1.18884668
## 60 Class:Blastocatellia 0.27774527 0.15224929
## 61 Class:Kryptonia -0.70331361 -0.36609923
## 62 Class:BD7-11 0.33121583 -1.31713050
## 63 Class:Spirochaetia 0.37293631 0.82945182
## 64 Class:Desulfobaccia -0.88434090 -0.96124200
## 65 Class:Omnitrophia 0.44134343 0.95952252
## 66 Class:Lentisphaeria 0.00725604 0.49624093
## 67 Class:Myxococcia 0.14660473 -0.05359345
## 68 Class:RCP2-54 0.81372639 -0.93118423
## 69 Class:Bathyarchaeia -0.64870416 0.47361685
## 70 Class:Syntrophobacteria 0.00725604 0.31112133
## 71 Class:Babeliae -0.32095195 -0.96033625
## 72 Class:Kiritimatiellae 0.03036095 0.18271326
## 73 Class:Aminicenantia -1.03264830 -0.53449045
## 74 Class:uncultured_1 -0.99458595 -0.17521885
## 75 Class:Thermoanaerobaculia -0.36087506 -0.64903227
## 76 Class:JG30-KF-CM66 -1.09659148 -2.39423402
## 77 Class:WPS-2 -1.53371447 -1.26961898
## 78 Class:SJA-28 -0.90337152 -0.05954530
## 79 Class:MBNT15 -0.48716606 0.33876418
## 80 Class:Bdellovibrionia -0.96855486 -0.52370971
## 81 Class:Gracilibacteria -0.99307173 -1.29200116
## 82 Class:Kapabacteria 0.07969609 -0.66173898
## 83 Class:Ktedonobacteria 0.47074616 -0.35562854
## 84 Class:Thermoplasmata -1.09983514 -1.82710939
## 85 Class:Methanomicrobia -0.88230912 0.52112565
## 86 Class:Syntrophomonadia 0.02556624 -0.14758736
## 87 Class:FCPU426 0.33453397 -1.37171746
## 88 Class:Subgroup_18 0.00725604 -0.05006725
## 89 Class:SHA-26 -0.76520695 -1.97406896
## 90 Class:Sericytochromatia -0.94784841 -2.10742685
## 91 Class:Hydrogenedentia -0.86621310 -1.60227755
## 92 Class:Campylobacteria -0.52169007 -1.97302329
## 93 Class:TA06 -0.55712016 -1.21901238
## 94 Class:Sumerlaeia 0.13805208 -0.31972315
## 95 Class:Deinococci -1.69965782 -3.73623639
## 96 Class:Methylomirabilia 0.84994170 0.54972471
## 97 Class:Synergistia 0.06022360 -0.25101837
## 98 Class:Methanosarcinia -0.50553623 0.25637936
## 99 Class:Nitrospiria 0.03195660 -2.16568903
## 100 Class:Methanobacteria -0.08015665 1.74712721
## lfc_TypePermafrost_TypeOther
## 1 -0.29876729
## 2 0.27533485
## 3 1.96862402
## 4 0.67209355
## 5 1.33800263
## 6 2.21447961
## 7 0.18184705
## 8 2.39043201
## 9 0.56403816
## 10 2.73533643
## 11 0.11791250
## 12 1.96574477
## 13 0.76144858
## 14 1.93170991
## 15 0.12581504
## 16 0.09077436
## 17 0.10261479
## 18 0.43705989
## 19 0.28014277
## 20 -4.94754956
## 21 -0.40673097
## 22 -0.20439620
## 23 0.92968732
## 24 -1.69509247
## 25 1.69043569
## 26 -1.19878030
## 27 -1.28441746
## 28 -0.36421044
## 29 1.17173260
## 30 2.81446462
## 31 -1.10712140
## 32 -1.17088087
## 33 -0.03147366
## 34 -0.51991022
## 35 2.13552205
## 36 -0.24310791
## 37 0.33170269
## 38 0.60376667
## 39 0.69635121
## 40 -0.32342395
## 41 1.16065027
## 42 -0.52183696
## 43 -0.42685357
## 44 0.06883325
## 45 1.13620763
## 46 1.72116300
## 47 -0.02478122
## 48 -0.73795189
## 49 2.85906095
## 50 -1.44083689
## 51 -0.99249930
## 52 -1.78178644
## 53 0.49756484
## 54 -1.49269579
## 55 0.53980936
## 56 -0.44083025
## 57 0.02625005
## 58 -1.47909685
## 59 -0.76364261
## 60 -0.12549598
## 61 0.33721438
## 62 -1.64834634
## 63 0.45651551
## 64 -0.07690110
## 65 0.51817908
## 66 0.48898489
## 67 -0.20019818
## 68 -1.74491062
## 69 1.12232101
## 70 0.30386529
## 71 -0.63938430
## 72 0.15235231
## 73 0.49815786
## 74 0.81936710
## 75 -0.28815721
## 76 -1.29764255
## 77 0.26409549
## 78 0.84382622
## 79 0.82593024
## 80 0.44484514
## 81 -0.29892943
## 82 -0.74143507
## 83 -0.82637470
## 84 -0.72727425
## 85 1.40343476
## 86 -0.17315361
## 87 -1.70625144
## 88 -0.05732328
## 89 -1.20886201
## 90 -1.15957844
## 91 -0.73606444
## 92 -1.45133321
## 93 -0.66189222
## 94 -0.45777523
## 95 -2.03657857
## 96 -0.30021698
## 97 -0.31124197
## 98 0.76191559
## 99 -2.19764563
## 100 1.82728386
# signifcant differences in log fold change
percent_abund <- dim(lfc)[1] / dim(se)[1]
dim(se)[1]
## [1] 300
dim(lfc)[1]
## [1] 124
# We keep the taxa with the highest and lowest log-fold changes between sites
lfc_stats <- lfc[[glue("lfc_{var}")]] %>% summary()
# lfc_q3 <- lfc_stats[["3rd Qu."]]
# lfc_q1 <- lfc_stats[["1st Qu."]]
# lfc %>% filter(lfc_Location == max(lfc_Location))
# lfc <- lfc[lfc[[glue("lfc_{var}")]] > lfc_q3 | lfc[[glue("lfc_{var}")]] < lfc_q1, ]
# message(glue("Percent of differentially abundant {chosen_rank}: {percent_abund}"))
# abc_plot <- abc_lfc_plot(lfc, var) + scale_fill_discrete(name = "Class")
# ggsave("./figures/abc_tax.png", abc_plot, width = 20, height = 10, dpi = 500)
# abc_res_paths <- abc_paths$res %>%
# select(-(grep("Intercept", colnames(abc_paths$res))))
# path_diff_abund <- ancombc_select(abc_res_paths, "diff_Location")
# path_se <- ancombc_select(abc_res_paths, "se_Location")
# path_lfc <- ancombc_select(abc_res_paths, "lfc_Location") %>%
# merge(path_se, by = c("name", "taxon")) %>%
# merge(path_diff_abund, by = c("name", "taxon")) %>%
# filter(diff_Location == TRUE)
#
# lfc_stats <- path_lfc$lfc_Location %>% summary()
# lfc_q3 <- lfc_stats[["3rd Qu."]]
# lfc_q1 <- lfc_stats[["1st Qu."]]
#
# percent_abund <- dim(path_lfc)[1] / dim(path_se)[1]
# path_lfc <- path_lfc[path_lfc$lfc_Location > lfc_q3 | path_lfc$lfc_Location < lfc_q1, ]
# message(glue("Percent of differentially abundant pathways: {percent_abund}"))
# abc_pathways <- abc_lfc_plot(path_lfc) + scale_fill_discrete(name = "Pathway")
# path_lfc %>% filter(lfc_Location == max(lfc_Location))
# ggsave("./figures/abc_paths.png", abc_pathways, width = 20, height = 10, dpi = 500)